---
title: "Cleaning Candidate Eurobarometer"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load Candidate Eurobarometer
 candidate_eurobarometer_raw <- 
   read_dta("../raw-data/y-multi-candidate-eurobarometer/candidate-eurobarometer-2002.dta")
```

# Clean Candidate Eurobarometer 47.2

```{r}
# declare dates
  c_eb_dates <- 
    tribble(
      ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
      "Bulgaria", "Sep. 15, 2002", "Sep. 28, 2002",
      "Cyprus", "Sep. 2, 2002", "Sep. 25, 2002",
      "Czechia", "Sep. 2, 2002", "Sep. 30, 2002", 
      "Estonia", "Sep. 5, 2002", "Sep. 22, 2002",
      "Hungary", "Sep. 4, 2002", "Sep. 22, 2002",
      "Latvia", "Sep. 13, 2002", "Sep. 30, 2002",
      "Lithuania", "Sep. 6, 2002", "Sep. 17, 2002",
      "Malta", "Sep. 5, 2002", "Sep. 26, 2002",
      "Poland", "Sep. 1, 2002", "Sep. 22, 2002",
      "Romania", "Sep. 9, 2002", "Sep. 30, 2002", #as Sept. 31 2002 in codebook 
      "Slovakia", "Sep. 16, 2002", "Oct. 15, 2002",
      "Slovenia", "Sep. 20, 2002", "Oct. 9, 2002",
      "Turkey", "Sep. 2, 2002", "Sep. 26, 2002",
      ) %>% 
    mutate(
      resp_interview_start_date = as.Date(resp_interview_start_date, "%b. %d, %Y"),
      resp_interview_end_date = as.Date(resp_interview_end_date, "%b. %d, %Y"))

# clean
  clean_candidate_eurobarometer <- 
    candidate_eurobarometer_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Candidate Eurobarometer",
        
      # round number (character vector, title case)  
        resp_round = "2002",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3t46CBQ",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = dplyr::recode(
            as.character(country),
            "1" = "Bulgaria",
            "2" = "Cyprus",
            "3" = "Czech Republic",
            "4" = "Estonia",
            "5" = "Hungary",
            "6" = "Latvia",
            "7" = "Lithuania",
            "8" = "Malta",
            "9" = "Poland",
            "10" = "Romania",
            "11" = "Slovakia",
            "12" = "Slovenia",
            "13" = "Turkey",
            "14" = "Cyprus",
            .default = NA_character_),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = NA) %>% 
        left_join(
          c_eb_dates, by = "resp_country_common") %>% 
        mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          dplyr::recode(
            as.character(d11_a),
            "1" = "Christian",
            "2" = "Christian",
            "3" = "Christian",
            "4" = "Christian",
            "5" = "Jewish",
            "6" = "Muslim",
            "7" = "Buddhist",
            "8" = "Hindu",
            "9" = "Other religion",
            "10" = NA_character_,
            .default = NA_character_),

      # respondent's denomination (character vector that corresponds to master list)
        resp_denomination = to_character(d11_a),
    
      # respondent's age
        resp_age = 
          as.character(
            case_when(
              d4 == 99 ~ NA_real_,
              d4 != 99 ~ as.numeric(102 - d4))), #year of birth (last two digits), interview taken on 2002.
    
      # respondent's education level
        across(starts_with("d3_b_"), as.character),
        resp_education_original = case_when(
          d3_b_bg == '1' ~ "1. Elementary (incl. no education) [No education]",
          d3_b_bg == '2' ~ "2. Primary [Primary]",
          d3_b_bg == '3' ~ "3. Secondary (incl. college / semi-higher) [Primary]",
          d3_b_bg == '4' ~ "4. University (higher) [College]",
          
          d3_b_cy == '1' ~ "1. Not completed primary [No education]",
          d3_b_cy == '2' ~ "2. Primary school [Primary]",
          d3_b_cy == '3' ~ "3. Gymnasium (3 years) [Primary]",
          d3_b_cy == '4' ~ "4. Lyceum (completed secondary) [Primary]",
          d3_b_cy == '5' ~ "5. College [Primary]",
          d3_b_cy == '6' ~ "6. University [College]",
          
          d3_b_cz == '1' ~ "1. Primary school [Primary]",
          d3_b_cz == '2' ~ "2. Secondary school [Primary]",
          d3_b_cz == '3' ~ "3. Non-university degree [Primary]",
          d3_b_cz == '4' ~ "4. University degree or more [College]",
          
          d3_b_ee == '1' ~ "1. Basic and less [Primary]",
          d3_b_ee == '2' ~ "2. Secondary education [Primary]",
          d3_b_ee == '3' ~ "3. Higher education [College]",
          
          d3_b_hu == '1' ~ "1. Uncompleted primary school [No education]",
          d3_b_hu == '2' ~ "2. Primary school [Primary]",
          d3_b_hu == '3' ~ "3. Three or less-year secondary school [Primary]",
          d3_b_hu == '4' ~ "4. Four or five-year secondary school [Primary]",
          d3_b_hu == '5' ~ "5. College and university degree [College]",
          
          d3_b_lv == '1' ~ "1. Primary/Basic or less [Primary]",
          d3_b_lv == '2' ~ "2. Secondary [Primary]",
          d3_b_lv == '3' ~ "3. Secondary special [Primary]",
          d3_b_lv == '4' ~ "4. Higher [College]",
          
          d3_b_lt == '1' ~ "1. Primary/Basic or less [Primary]",
          d3_b_lt == '2' ~ "2. Secondary [Primary]",
          d3_b_lt == '3' ~ "3. Secondary special [Primary]",
          d3_b_lt == '4' ~ "4. Higher [College]",
          
          d3_b_mt == '1' ~ "1. Never attended school [No education]",
          d3_b_mt == '2' ~ "2. Up to primary level [Primary]",
          d3_b_mt == '3' ~ "3. Secondary/trade school [Primary]",
          d3_b_mt == '4' ~ "4. Post secondary (incl. higher secondary, 6th form & college) [Primary]",
          d3_b_mt == '5' ~ "5. University [College]",
          
          d3_b_pl == '1' ~ "1. Uncompleted primary school [No education]",
          d3_b_pl == '2' ~ "2. Primary school [Primary]",
          d3_b_pl == '3' ~ "3. Basic vocational [Primary]",
          d3_b_pl == '4' ~ "4. General and technical secondary school [Primary]",
          d3_b_pl == '5' ~ "5. University degree or more [College]",
          
          d3_b_ro == '1' ~ "1. No degree [No education]",
          d3_b_ro == '2' ~ "2. Primary school [Primary]",
          d3_b_ro == '3' ~ "3. Gymnasium [Primary]",
          d3_b_ro == '4' ~ "4. High school - level 1 [Primary]",
          d3_b_ro == '5' ~ "5. Vocational school [Primary]",
          d3_b_ro == '6' ~ "6. High school - level 2 [Primary]",
          d3_b_ro == '7' ~ "7. Post high/college [Primary]",
          d3_b_ro == '8' ~ "8. University degree or more [College]",
          
          d3_b_sk == '5' ~ "1. Without education [No education]",
          d3_b_sk == '1' ~ "2. Elementary [Primary]",
          d3_b_sk == '2' ~ "3. Secondary school without school leaving exam -  apprenticed [Primary]",
          d3_b_sk == '3' ~ "4. Secondary school with school leaving exam [Primary]",
          d3_b_sk == '4' ~ "5. University [College]",
          
          d3_b_si == '1' ~ "1. Uncompleted primary school [No education]",
          d3_b_si == '2' ~ "2. Primary school [Primary]",
          d3_b_si == '3' ~ "3. Three or less-year secondary school [Primary]",
          d3_b_si == '4' ~ "4. Four or five-year secondary school [Primary]",
          d3_b_si == '5' ~ "5. Non-university degree [Primary]",
          d3_b_si == '6' ~ "6. University degree or more [College]",
          
          d3_b_tk == '1' ~ "1. No education [No education]",
          d3_b_tk == '2' ~ "2. Uncompleted primary school [No education]",
          d3_b_tk == '3' ~ "3. Primary school [Primary]",
          d3_b_tk == '4' ~ "4. Secondary school [Primary]",
          d3_b_tk == '5' ~ "5. High school [Primary]",
          d3_b_tk == '6' ~ "6. University degree or more [College]"),
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = as.numeric((d14 == 2)*1),
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural = 
          dplyr::recode(
            as.character(d10),
            "1" = 1,
            "2" = 0,
            "3" = 0,
            .default = NA_real_),  
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: q54.c; QTEXT: Some people are disturbed by the opinions, customs and way of life of people different from themselves. And do you find the presence of people of another religion disturbing?; ROPTIONS: 1 = Disurbing [=1] + 2 = Not disturbing [=0]; TARGET: Different religion; TYPE: Discomfort",
      
      # original response (as character vector)
        resp_soc_dist_1_original = as.character(q54_c),   

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            q54_c %in% c(1) ~ 1,
            q54_c %in% c(2) ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = NA,
    
      # original response (as character vector)
        resp_gentrust_original = NA,      

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = NA, 
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: d11.b; QTEXT: Do you attend religious services other than weddings or funerals several times a week, once a week, a few times a year, once a year or less, or never?; ROPTIONS: 1 = Several times a week + 2 = Once a week + 3 = A few times a year + 4 = Once a year or less + 5 = Never",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(d11_b),
            "8" = NA_real_,
            "9" = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = (5 - resp_religiosity_original)/4,

    ) %>% 
    select(starts_with("resp_"))
```

# Save data

```{r}
  saveRDS(clean_candidate_eurobarometer, "../cleaned-data/y-21-multi-candidate-eurobarometer.rds")
```
